clear all; close all; clc;

% Load data
data = readtable('Data_proteomics_Verhagen.xlsx');

% Define sectors
Upt = []; UGlc = []; LGlc = []; Ferm = []; ESnk = []; Resp = []; Treh = [];
Grwt = []; Struc = [];

oxphor = [];

% Extra sector to store proteins that have not been appointed to a sector
Rest = [];

for i = 1:size(data,1)
    if contains(data{i,2}, 'HXK') == 1 || contains(data{i,2}, 'GLK') == 1 ...
            || contains(data{i,2}, 'HXT') == 1
        Upt = [Upt; data(i, [1:9])];
    elseif contains(data{i,2}, 'PGI') == 1 || contains(data{i,2}, 'FBP') == 1 ...
            || contains(data{i,2}, 'PFK') == 1
        UGlc = [UGlc; data(i, [1:9])];
    elseif contains(data{i,2}, 'FBA') == 1 || contains(data{i,2}, 'TPI') == 1 ...
            || contains(data{i,2}, 'TDH') == 1 || contains(data{i,2}, 'PGK') == 1 ...
            || contains(data{i,2}, 'GPM') == 1 || contains(data{i,2}, 'ENO') == 1 ...
            || contains(data{i,2}, 'PYK') == 1 || contains(data{i,2}, 'PCK') == 1
        LGlc = [LGlc; data(i, [1:9])];
    elseif contains(data{i,2}, 'PDC') == 1 || contains(data{i,2}, 'ADH') == 1 ...
        Ferm = [Ferm; data(i, [1:9])];
    elseif contains(data{i,2}, 'GPD') == 1 || contains(data{i,2}, 'RHR') == 1 ...
            || contains(data{i,2}, 'HOR') == 1
        ESnk = [ESnk; data(i, [1:9])];
    elseif contains(data{i,2}, 'PYC') == 1 || contains(data{i,2}, 'PDA') == 1 ...
            || contains(data{i,2}, 'PDB') == 1 || contains(data{i,2}, 'PDX') == 1 ...
            || contains(data{i,2}, 'LPD') == 1 || contains(data{i,2}, 'CIT') == 1 ...
            || contains(data{i,2}, 'MDH') == 1 || contains(data{i,2}, 'ACO') == 1 ...
            || contains(data{i,2}, 'MLS') == 1 || contains(data{i,2}, 'ICL') == 1 ...
            || contains(data{i,2}, 'IDH') == 1 || contains(data{i,2}, 'IDP') == 1 ...
            || contains(data{i,2}, 'SDH') == 1 || contains(data{i,2}, 'LCS') == 1 ...
            || contains(data{i,2}, 'FUM') == 1 || contains(data{i,2}, 'KGD') == 1 ...
            || contains(data{i,2}, 'LPD') == 1 || contains(data{i,2}, 'LAT') == 1 ...
            || contains(data{i,2}, 'ALD') == 1 || contains(data{i,2}, 'ACS') == 1 
        Resp = [Resp; data(i, [1:9])];   
    elseif contains(data{i,2}, 'PGM') == 1 || contains(data{i,2}, 'TPS') == 1 ...
            || contains(data{i,2}, 'GSY') == 1 || contains(data{i,2}, 'NTH') == 1 ...
            || contains(data{i,2}, 'ATH') == 1 || contains(data{i,2}, 'GPH') == 1
        Treh = [Treh; data(i, [1:9])];
    elseif contains(data{i,2}, 'HSP') == 1 || contains(data{i,2}, 'HRI') == 1 ...
            || contains(data{i,2}, 'PKR') == 1 || contains(data{i,2}, 'PERK') == 1 ...
            || contains(data{i,2}, 'GCN2') == 1 || contains(data{i,2}, 'YBH') == 1 ... 
            || contains(data{i,2}, 'RCK') == 1 || contains(data{i,2}, 'RQC') == 1 ...
            || contains(data{i,2}, 'RCN') == 1 || contains(data{i,2}, 'LSM') == 1 ...
            || contains(data{i,2}, 'PIL') == 1 || contains(data{i,2}, 'TMA') == 1 ...
            || contains(data{i,2}, 'RDL') == 1 || contains(data{i,2}, 'RTN') == 1 ...
            || contains(data{i,2}, 'RTN') == 1 || contains(data{i,2}, 'PST') == 1 ...
            || contains(data{i,2}, 'STM') == 1
        Struc = [Struc; data(i, [1:9])];
    
    % Sort the remaining proteins based on Cata, CatB and CatC
    elseif contains(data{i,9}, 'Oxidative phosphorylation') == 1 ...
            || strcmp(data{i,9}, 'TCA Cycle') == 1 ...
        Resp = [Resp; data(i, [1:9])]; 
            if contains(data{i,9}, 'Oxidative phosphorylation') == 1 
                oxphor = [oxphor; data(i,:)];
            end
    elseif  contains(data{i,7}, 'Genetic Information Processing') == 1 ...
            || strcmp(data{i,8}, 'Amino acid metabolism') == 1 ...
            || strcmp(data{i,8}, 'Metabolism of other amino acids') == 1 ...
            || strcmp(data{i,8}, 'Lipid metabolism') == 1 ...
            || strcmp(data{i,8}, 'Metabolism of cofactors and vitamins') == 1 ...
            || contains(data{i,8}, 'biosynthesis') == 1 ...
            || contains(data{i,9}, 'biosynthesis') == 1 ...
            || strcmp(data{i,8}, 'Nucleotide metabolism') == 1 ...
            || contains(data{i,8}, 'Cytoskeleton') == 1 ...
            || contains(data{i,8}, 'metabolism') == 1 ...
            || strcmp(data{i,7}, '') == 1 ...
        Grwt = [Grwt; data(i, [1:9])];
    elseif strcmp(data{i,7}, 'Not Included in Pathway or Brite') == 1 ...
            || strcmp(data{i,7}, 'Environmental Information Processing') == 1 ...
            || strcmp(data{i,7}, 'Cellular Processes') == 1 ...
            || contains(data{i,7}, 'signaling') == 1 ...
            || contains(data{i,8}, 'Protein kinases') == 1 ...
            || contains(data{i,8}, 'Protein phosphatases') == 1 ...
            || contains(data{i,9}, 'Peptidases') == 1 ...
            || contains(data{i,8}, 'Glycosyltransferases') == 1 ...
            || contains(data{i,7}, 'Organismal Systems') == 1 ...
			|| contains(data{i,7}, 'Protein families: signaling and cellular processes') == 1 ...
        Struc = [Struc; data(i, [1:9])];
    else
        Rest = [Rest; data(i, [1,2,7,8,9])];
    end
end

%% Determine proteome allocation chemostat

% Calculate fractions
data = [{table2array(rmmissing(Upt(:,3)))}; 
        {table2array(rmmissing(UGlc(:,3)))};
        {table2array(rmmissing(LGlc(:,3)))};
        {table2array(rmmissing(Ferm(:,3)))};
        {table2array(rmmissing(ESnk(:,3)))};
        {table2array(rmmissing(Resp(:,3)))};
        {table2array(rmmissing(Treh(:,3)))};
        {table2array(rmmissing(Grwt(:,3)))};
        {table2array(rmmissing(Struc(:,3)))}];

% Calculate signal total proteome
sign_tot = 0;
for i = 1:9
    sign_sec(i) = sum(data{i});
    sign_tot = sign_tot + sum(data{i});
end

% Calculate fractions
% Upt - UGlc - LGlc - Ferm - ESnk - Resp - Treh - Grwt - Struc
phi_ch = sign_sec./sign_tot;

%% Show results in proteomaps
names = {'Upt', 'Uglc', 'Lglc', 'Ferm', 'ESnk', 'Resp' 'Treh', 'Grwt', 'Struc'};
textscale = 0.3;

figure(1)
Proteomap(phi_ch,names,textscale)